home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
EuroCD 3
/
EuroCD 3.iso
/
Games
/
Doom
/
ADoom-0.8
/
ADoom_src
/
amiga_draw.s
< prev
next >
Wrap
Text File
|
1998-06-24
|
37KB
|
1,605 lines
*
* amiga_draw.s - optimized rendering
* by Aki Laukkanen <amlaukka@cc.helsinki.fi>
*
* This file is public domain.
*
; mc68020
; multipass
; if (_eval(DEBUG)&$8000)
; debug on,lattice4
; endc
include "exec/types.i"
;-----------------------------------------------------------------------
SCREENWIDTH equ 320
FUZZTABLE equ 250
FUZZOFF equ SCREENWIDTH
FRACBITS equ 16
FRACUNIT equ (1<<FRACBITS)
*
* global functions
*
;; xdef _R_DrawColumn_030 ; high detail
;; xdef @R_DrawColumn_030
xdef _R_DrawColumn_040 ; high detail
xdef @R_DrawColumn_040
xdef _R_DrawSpan_040
xdef @R_DrawSpan_040
xdef _R_DrawColumn_060
xdef @R_DrawColumn_060
xdef _R_DrawSpan_060
xdef @R_DrawSpan_060
xdef _R_DrawFuzzColumn
xdef @R_DrawFuzzColumn
;; xdef _R_DrawTranslatedColumn
;; xdef @R_DrawTranslatedColumn
xdef _R_DrawSpanLow ; low detail
xdef @R_DrawSpanLow
xdef _R_DrawColumnLow
xdef @R_DrawColumnLow
xdef _R_DrawFuzzColumnLow
xdef @R_DrawFuzzColumnLow
;; xdef _R_DrawTranslatedColumnLow
;; xdef @R_DrawTranslatedColumnLow
xdef _R_RenderSegLoop
xdef @R_RenderSegLoop
*
* needed symbols/labels
*
xref _dc_yl
xref _dc_yh
xref _dc_x
xref _columnofs
xref _ylookup
xref _dc_iscale
xref _centery
xref _dc_texturemid
xref _dc_source
xref _dc_colormap
xref _ds_xfrac
xref _ds_yfrac
xref _ds_x1
xref _ds_y
xref _ds_x2
xref _ds_xstep
xref _ds_ystep
xref _ds_source
xref _ds_colormap
xref _fuzzoffset
xref _fuzzpos
xref _viewheight
xref _dc_translation
xref _colormaps
;-----------------------------------------------------------------------
section text,code
; low detail drawing functions
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawColumnLow
@R_DrawColumnLow
movem.l d3-d4/d6-d7/a2/a3,-(sp)
move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl
move.l _dc_yl(a4),d0
sub.l d0,d7
bmi.w .end1
move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea _ylookup(a4),a0
add.l d1,d1 ; dc_x <<= 1
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _dc_colormap(a4),d4
move.l _dc_source(a4),a1
move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep
sub.l _centery(a4),d0
muls.l d1,d0
add.l _dc_texturemid(a4),d0
moveq #$7f,d3
lea (SCREENWIDTH*4).w,a3
; d7: cnt >> 2
; a0: chunky
; a1: texture
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac (.......................................)
; d3: $7f
; d4: light table aligned to 256 byte boundary
; a3: SCREENWIDTH
move.l d7,d6
and.w #3,d6
swap d0 ; swap decimals and fraction
swap d1
add.w .width_tab1(pc,d6.w*2),a0
lsr.w #2,d7
move.w .tmap_tab1(pc,d6.w*2),d6
and.w d3,d0
sub.w d1,d0
add.l d1,d0 ; setup the X flag
jmp .loop1(pc,d6.w)
cnop 0,4
.width_tab1
dc.w -3*SCREENWIDTH
dc.w -2*SCREENWIDTH
dc.w -1*SCREENWIDTH
dc.w 0
.tmap_tab1
dc.w .01-.loop1
dc.w .11-.loop1
dc.w .21-.loop1
dc.w .31-.loop1
.loop1
.31
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
move.w (a2),d6
and.w d3,d0
move.b (a2),d6
move.w d6,(a0)
.21
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
move.w (a2),d6
and.w d3,d0
move.b (a2),d6
move.w d6,SCREENWIDTH(a0)
.11
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
move.w (a2),d6
and.w d3,d0
move.b (a2),d6
move.w d6,SCREENWIDTH*2(a0)
.01
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
move.w (a2),d6
and.w d3,d0
move.b (a2),d6
move.w d6,SCREENWIDTH*3(a0)
add.l a3,a0
.loop_end1
dbf d7,.loop1
.end1
movem.l (sp)+,d3-d4/d6-d7/a2/a3
rts
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawSpanLow
@R_DrawSpanLow
movem.l d2-d7/a2-a4,-(sp)
move.l _ds_y(a4),d0
move.l _ds_x1(a4),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1]
lea _ylookup(a4),a0
add.l d1,d1
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _ds_x2(a4),d7 ; count = _ds_x2 - _ds_x1
move.l _ds_source(a4),a1
add.l d7,d7
move.l _ds_colormap(a4),a2
sub.l d1,d7
addq.l #2,d7
move.l _ds_xfrac(a4),d0
move.l _ds_yfrac(a4),d1
move.l _ds_xstep(a4),d2
move.l _ds_ystep(a4),d3
move.l a0,d4 ; notice, that this address must already be aligned by word
btst #1,d4
beq.b .skips2
move.l d0,d5 ; do the unaligned pixels
move.l d1,d6 ; so we can write to longword
swap d5 ; boundary in the main loop
swap d6
and.w #$3f,d5
and.w #$3f,d6 ; this is the worst possible
lsl.w #6,d6 ; way but hey, this is not a loop
or.w d5,d6
move.b (a1,d6.w),d5
add.l d2,d0
move.b (a2,d5.w),(a0)+
add.l d3,d1
move.b (a2,d5.w),(a0)+ ; I know this is crap but spare me the comments
subq.l #2,d7
.skips2 move.l a2,d4
lea $1000(a1),a1 ; catch 22
move.l a0,a3
add.l d7,a3
move.l d7,d5
and.b #~7,d5
move.l a0,a4
add.l d5,a4
eor.w d0,d1 ; swap fraction parts for addx
eor.w d2,d3
eor.w d1,d0
eor.w d3,d2
eor.w d0,d1
eor.w d2,d3
swap d0
swap d1
swap d2
swap d3
lsl.w #6,d1
lsl.w #6,d3
move.w #$ffc0,d6
move.w #$f03f,d7
lsr.w #3,d5
beq.b .skip_loop22
sub.w d2,d0
add.l d2,d0 ; setup the X flag
.loop22 or.w d6,d0 ; Not really and exercise in optimizing
or.w d7,d1 ; but I guess it's faster than 1x1 for 030
and.w d1,d0 ; where this low detail business is needed.
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.w (a2),d5
or.w d6,d0
move.b (a2),d5
or.w d7,d1
and.w d1,d0
swap d5
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.w (a2),d5
or.w d6,d0
move.b (a2),d5
or.w d7,d1
and.w d1,d0
move.l d5,(a0)+
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.w (a2),d5
or.w d6,d0
move.b (a2),d5
or.w d7,d1
and.w d1,d0
swap d5
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.w (a2),d5
move.b (a2),d5
move.l d5,(a0)+
cmp.l a0,a4
bne.b .loop22
.skip_loop22
sub.w d2,d0
add.l d2,d0
bra.b .loop_end22
.loop32 or.w d6,d0
or.w d7,d1
and.w d1,d0
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.b (a2),(a0)+
move.b (a2),(a0)+
.loop_end22
cmp.l a0,a3
bne.b .loop32
.end22 movem.l (sp)+,d2-d7/a2-a4
rts
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawTranslatedColumnLow
@R_DrawTranslatedColumnLow
movem.l d2-d4/d6-d7/a2/a3,-(sp)
move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl
move.l _dc_yl(a4),d0
sub.l d0,d7
bmi.w .end3
move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea _ylookup(a4),a0
add.l d1,d1
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _dc_translation(a4),d2
move.l _dc_colormap(a4),d4
move.l _dc_source(a4),a1
move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep
sub.l _centery(a4),d0
muls.l d1,d0
add.l _dc_texturemid(a4),d0
moveq #$7f,d3
lea (SCREENWIDTH*4).w,a3
; d7: cnt >> 2
; a0: chunky
; a1: texture
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac (.......................................)
; d3: $7f
; d4: light table aligned to 256 byte boundary
; d2: translation table aligned to 256 byte boundary
; a3: SCREENWIDTH
move.l d7,d6
and.w #3,d6
swap d0 ; swap decimals and fraction
swap d1
add.w .width_tab3(pc,d6.w*2),a0
lsr.w #2,d7
move.w .tmap_tab3(pc,d6.w*2),d6
and.w d3,d0
sub.w d1,d0
add.l d1,d0 ; setup the X flag
jmp .loop3(pc,d6.w)
cnop 0,4
.width_tab3
dc.w -3*SCREENWIDTH
dc.w -2*SCREENWIDTH
dc.w -1*SCREENWIDTH
dc.w 0
.tmap_tab3
dc.w .03-.loop3
dc.w .13-.loop3
dc.w .23-.loop3
dc.w .33-.loop3
.loop3
.33
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
move.l d4,a2
and.w d3,d0
move.w (a2),d6
move.b (a2),d6
move.w d6,(a0)
.23
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
move.l d4,a2
and.w d3,d0
move.w (a2),d6
move.b (a2),d6
move.w d6,SCREENWIDTH(a0)
.13
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
move.l d4,a2
and.w d3,d0
move.w (a2),d6
move.b (a2),d6
move.w d6,SCREENWIDTH*2(a0)
.03
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
move.l d4,a2
and.w d3,d0
move.w (a2),d6
move.b (a2),d6
move.b d6,SCREENWIDTH*3(a0)
add.l a3,a0
.loop_end3
dbf d7,.loop3
.end3
movem.l (sp)+,d2-d4/d6-d7/a2/a3
rts
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawFuzzColumnLow
@R_DrawFuzzColumnLow
movem.l d4/d6-d7/a2/a3,-(sp)
move.l _viewheight(a4),d1
subq.l #1,d1
move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl
cmp.l d1,d7
bne.b .skip_yh4
subq.l #1,d1
move.l d1,d7
.skip_yh4
move.l _dc_yl(a4),d0
bne.b .skip_yl4
moveq #1,d0
.skip_yl4
sub.l d0,d7
bmi.w .end4
move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea _ylookup(a4),a0
add.l d1,d1
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _colormaps(a4),d4
add.l #6*256,d4
lea _fuzzoffset(a4),a1
move.l _fuzzpos(a4),d0 ; bring it down
.pos_loop4 sub.w #200,d0
bpl .pos_loop4
add.w #200,d0
add.l d0,a1
lea (SCREENWIDTH*4).w,a3
; d7: cnt >> 2
; a0: chunky
; a1: fuzzoffset
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac (.......................................)
; d3: $7f
; d4: light table aligned to 256 byte boundary
; a3: SCREENWIDTH
move.l d7,d6
and.w #3,d6
add.w .width_tab4(pc,d6.w*2),a0
lsr.w #2,d7
move.w .tmap_tab4(pc,d6.w*2),d6
jmp .loop4(pc,d6.w)
cnop 0,4
.width_tab4
dc.w -3*SCREENWIDTH
dc.w -2*SCREENWIDTH
dc.w -1*SCREENWIDTH
dc.w 0
.tmap_tab4
dc.w .04-.loop4
dc.w .14-.loop4
dc.w .24-.loop4
dc.w .34-.loop4
.loop4
.34 move.l a0,a2 ; This is essentially
add.l (a1)+,a2 ; just moving memory around.
move.b (a2),d4
move.l d4,a2
move.w (a2),d6
move.b (a2),d6
move.w d6,(a0)
.24 lea SCREENWIDTH(a0),a2
add.l (a1)+,a2
move.b (a2),d4
move.l d4,a2
move.w (a2),d6
move.b (a2),d6
move.w d6,SCREENWIDTH(a0)
.14 lea 2*SCREENWIDTH(a0),a2
add.l (a1)+,a2
move.b (a2),d4
move.l d4,a2
move.w (a2),d6
move.b (a2),d6
move.w d6,2*SCREENWIDTH(a0)
.04 lea 3*SCREENWIDTH(a0),a2
add.l (a1)+,a2
move.b (a2),d4
move.l d4,a2
move.w (a2),d6
move.b (a2),d6
move.w d6,3*SCREENWIDTH(a0)
add.l a3,a0
.loop_end4
dbf d7,.loop4
sub.l #_fuzzoffset,a1
move.l a1,_fuzzpos
.end4
movem.l (sp)+,d4/d6-d7/a2/a3
rts
;-----------------------------------------------------------------------
; high detail versions
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawFuzzColumn
@R_DrawFuzzColumn
movem.l d4/d6-d7/a2/a3,-(sp)
move.l _viewheight(a4),d1
subq.l #1,d1
move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl
cmp.l d1,d7
bne.b .skip_yh5
subq.l #1,d1
move.l d1,d7
.skip_yh5
move.l _dc_yl(a4),d0
bne.b .skip_yl5
moveq #1,d0
.skip_yl5
sub.l d0,d7
bmi.w .end5
move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea _ylookup(a4),a0
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _colormaps(a4),d4
add.l #6*256,d4
lea _fuzzoffset(a4),a1
move.l _fuzzpos(a4),d0
.pos_loop5 sub.w #200,d0
bpl .pos_loop5
add.w #200,d0
add.l d0,a1
lea (SCREENWIDTH*4).w,a3
; d7: cnt >> 2
; a0: chunky
; a1: fuzzoffset
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac (.......................................)
; d3: $7f
; d4: light table aligned to 256 byte boundary
; a3: SCREENWIDTH
move.l d7,d6
and.w #3,d6
add.w .width_tab5(pc,d6.w*2),a0
lsr.w #2,d7
move.w .tmap_tab5(pc,d6.w*2),d6
jmp .loop5(pc,d6.w)
cnop 0,4
.width_tab5
dc.w -3*SCREENWIDTH
dc.w -2*SCREENWIDTH
dc.w -1*SCREENWIDTH
dc.w 0
.tmap_tab5
dc.w .05-.loop5
dc.w .15-.loop5
dc.w .25-.loop5
dc.w .35-.loop5
.loop5
.35 move.l a0,a2 ; This is essentially
add.l (a1)+,a2 ; just moving memory around.
move.b (a2),d4
move.l d4,a2 ; Not 060 optimized but
move.b (a2),(a0) ; if you have hordes of
.25 lea SCREENWIDTH(a0),a2 ; invisible monsters which
add.l (a1)+,a2 ; slow down the game too much,
move.b (a2),d4 ; do tell me.
move.l d4,a2
move.b (a2),SCREENWIDTH(a0)
.15 lea 2*SCREENWIDTH(a0),a2
add.l (a1)+,a2
move.b (a2),d4
move.l d4,a2
move.b (a2),2*SCREENWIDTH(a0)
.05 lea 3*SCREENWIDTH(a0),a2
add.l (a1)+,a2
move.b (a2),d4
move.l d4,a2
move.b (a2),3*SCREENWIDTH(a0)
add.l a3,a0
.loop_end5
dbf d7,.loop5
sub.l #_fuzzoffset,a1
move.l a1,_fuzzpos
.end5
movem.l (sp)+,d4/d6-d7/a2/a3
rts
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawTranslatedColumn ; no 060 version :(
@R_DrawTranslatedColumn
movem.l d2-d4/d6-d7/a2/a3,-(sp)
move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl
move.l _dc_yl(a4),d0
sub.l d0,d7
bmi.w .end6
move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea _ylookup(a4),a0
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _dc_translation(a4),d2
move.l _dc_colormap(a4),d4
move.l _dc_source(a4),a1
move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep
sub.l _centery(a4),d0
muls.l d1,d0
add.l _dc_texturemid(a4),d0
moveq #$7f,d3
lea (SCREENWIDTH*4).w,a3
; d7: cnt >> 2
; a0: chunky
; a1: texture
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac (.......................................)
; d3: $7f
; d4: light table aligned to 256 byte boundary
; d2: translation table aligned to 256 byte boundary
; a3: SCREENWIDTH
move.l d7,d6
and.w #3,d6
swap d0 ; swap decimals and fraction
swap d1
add.w .width_tab6(pc,d6.w*2),a0
lsr.w #2,d7
move.w .tmap_tab6(pc,d6.w*2),d6
and.w d3,d0
sub.w d1,d0
add.l d1,d0 ; setup the X flag
jmp .loop6(pc,d6.w)
cnop 0,4
.width_tab6
dc.w -3*SCREENWIDTH
dc.w -2*SCREENWIDTH
dc.w -1*SCREENWIDTH
dc.w 0
.tmap_tab6
dc.w .06-.loop6
dc.w .16-.loop6
dc.w .26-.loop6
dc.w .36-.loop6
.loop6
.36
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
and.w d3,d0
move.l d4,a2
move.b (a2),(a0)
.26
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
and.w d3,d0
move.l d4,a2
move.b (a2),SCREENWIDTH(a0)
.16
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
and.w d3,d0
move.l d4,a2
move.b (a2),SCREENWIDTH*2(a0)
.06
move.b (a1,d0.w),d2
move.l d2,a2
addx.l d1,d0
move.b (a2),d4
and.w d3,d0
move.l d4,a2
move.b (a2),SCREENWIDTH*3(a0)
add.l a3,a0
.loop_end6
dbf d7,.loop6
.end6
movem.l (sp)+,d2-d4/d6-d7/a2/a3
rts
;-----------------------------------------------------------------------
cnop 0,4
; routine from j.selck@flensburg.netsurf.de (Aki's 040 routine is faster)
;_R_DrawColumn_030
;@R_DrawColumn_030
; movem.l d3-d7/a2-a5,-(sp)
; move.l _dc_yl(a4),d0
; move.l _dc_yh(a4),d7
; sub.l d0,d7
; bmi.b 1$
; move.l _dc_x(a4),d1
; lea _columnofs(a4),a5
; lea (a5,d1.l*4),a1
; lea _ylookup(a4),a5
; movea.l (a5,d0.l*4),a2
; adda.l (a1),a2
; move.l _dc_iscale(a4),d6
; sub.l _centery(a4),d0
; muls.l d6,d0
; move.l _dc_texturemid(a4),d5
; add.l d0,d5
; movea.l _dc_source(a4),a3
; movea.l _dc_colormap(a4),a4
; moveq #127,d4
; move.l #SCREENWIDTH,d3
; moveq #0,d1 ; ensure high bits of d1 are clear
; add.w d6,d5 ; frac += fracstep (also sets X flag)
; swap d5 ; swap(frac)
; swap d6 ; swap(fracstep)
; and.w d4,d5 ; (frac>>16)&127
;2$ move.b (a3,d5.w),d1 ; dc_source[(frac>>FRACBITS)&127]
; move.b (a4,d1.w),(a2) ; *dest = dc_colormap[d1]
; addx.l d6,d5 ; swap(frac += fracstep), use & set X
; adda.l d3,a2 ; dest += SCREENWIDTH
; and.w d4,d5 ; (frac>>16)&127
; dbra d7,2$
;1$ movem.l (sp)+,d3-d7/a2-a5
; rts
;-----------------------------------------------------------------------
cnop 0,4
_R_DrawColumn_060
@R_DrawColumn_060
movem.l d2-d3/d5-d7/a2/a3,-(sp)
move.l (_dc_yh),d7 ; count = _dc_yh - _dc_yl
move.l (_dc_yl),d0
sub.l d0,d7
bmi.w .end7
move.l (_dc_x),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea (_ylookup),a0
move.l (a0,d0.l*4),a0
lea (_columnofs),a1
add.l (a1,d1.l*4),a0
move.l (_dc_colormap),a2
move.l (_dc_source),a1
move.l (_dc_iscale),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep
sub.l (_centery),d0
muls.l d1,d0
add.l (_dc_texturemid),d0
moveq #$7f,d3
move.l #SCREENWIDTH,a3
move.l d7,d6 ; Do the leftover iterations in
and.w #3,d6 ; this loop.
addq.w #1,d6
.skip_loop7
move.l d0,d5
swap d5
and.l d3,d5
move.b (a1,d5.w),d5
add.l d1,d0
move.b (a2,d5.w),(a0)
add.l a3,a0
subq.w #1,d6
bne.b .skip_loop7
; d7: cnt >> 2
; a0: chunky
; a1: texture
; a2: light_table
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac*2 (.......................................)
; d2: frac+dfrac(.......................................)
; d3: $7f
; a3: SCREENWIDTH
.skip7
lsr.l #2,d7
subq.l #1,d7
bmi.b .end7
add.l a3,a3
move.l d0,d2
add.l a3,a3
add.l d1,d2
add.l d1,d1
eor.w d0,d2 ; swap the fraction part for addx
eor.w d2,d0 ; assuming 16.16 fixed point
eor.w d0,d2
swap d0 ; swap decimals and fraction
swap d1
swap d2
moveq #0,d5
and.w d3,d2
and.w d3,d0
sub.w d1,d0
add.l d1,d0 ; setup the X flag
move.b (a1,d2.w),d5
.loop7
; This should be reasonably scheduled for
; m68060. It should perform well on other processors
; too. That AGU stall still bothers me though.
move.b (a1,d0.w),d6 ; stall + pOEP but allows sOEP
addx.l d1,d2 ; pOEP only
move.b (a2,d5.l),d5 ; pOEP but allows sOEP
and.w d3,d2 ; sOEP
move.b (a2,d6.l),d6 ; pOEP but allows sOEP
move.b d5,SCREENWIDTH(a0) ; sOEP
addx.l d1,d0 ; pOEP only
move.b (a1,d2.w),d5 ; pOEP but allows sOEP
and.w d3,d0 ; sOEP
move.b d6,(a0) ; pOEP
; = ~4 cycles/pixel
; + cache misses
; The vertical writes are the true timehog of the loop
; because of the characteristics of the copyback cache
; operation.
; Better mark the chunky buffer as write through
; with the MMU and have all the horizontal writes
; be longs aligned to longword boundary.
move.b (a1,d0.w),d6
addx.l d1,d2
move.b (a2,d5.l),d5
and.w d3,d2
move.b (a2,d6.l),d6
move.b d5,SCREENWIDTH*3(a0)
addx.l d1,d0
move.b (a1,d2.w),d5
and.w d3,d0
move.b d6,SCREENWIDTH*2(a0)
add.l a3,a0
.loop_end7
dbf d7,.loop7
; it's faster to divide it to two lines on 060
; and shouldn't be slower on 040.
; move.b (a1,d0.w),d6 ; new
; move.b (a2,d6.l),d6 ; new
; move.b d6,(a0) ; new
.end7
movem.l (sp)+,d2-d3/d5-d7/a2/a3
rts
;-----------------------------------------------------------------------
cnop 0,4
; 040 version
_R_DrawColumn_040
@R_DrawColumn_040
movem.l d3-d4/d6-d7/a2/a3,-(sp)
move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl
move.l _dc_yl(a4),d0
sub.l d0,d7
bmi.w .end8
move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x]
lea _ylookup(a4),a0
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _dc_colormap(a4),d4
move.l _dc_source(a4),a1
move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep
sub.l _centery(a4),d0
muls.l d1,d0
add.l _dc_texturemid(a4),d0
moveq #$7f,d3
lea (SCREENWIDTH*4).w,a3
; d7: cnt >> 2
; a0: chunky
; a1: texture
; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU)
; d1: dfrac (.......................................)
; d3: $7f
; d4: light table aligned to 256 byte boundary
; a3: SCREENWIDTH
move.l d7,d6
and.w #3,d6
swap d0 ; swap decimals and fraction
swap d1
add.w .width_tab8(pc,d6.w*2),a0
lsr.w #2,d7
move.w .tmap_tab8(pc,d6.w*2),d6
and.w d3,d0
sub.w d1,d0
add.l d1,d0 ; setup the X flag
jmp .loop8(pc,d6.w)
cnop 0,4
.width_tab8
dc.w -3*SCREENWIDTH
dc.w -2*SCREENWIDTH
dc.w -1*SCREENWIDTH
dc.w 0
.tmap_tab8
dc.w .08-.loop8
dc.w .18-.loop8
dc.w .28-.loop8
dc.w .38-.loop8
.loop8
.38
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
and.w d3,d0
move.b (a2),(a0)
.28
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
and.w d3,d0
move.b (a2),SCREENWIDTH(a0)
.18
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
and.w d3,d0
move.b (a2),SCREENWIDTH*2(a0)
.08
move.b (a1,d0.w),d4
addx.l d1,d0
move.l d4,a2
and.w d3,d0
move.b (a2),SCREENWIDTH*3(a0)
add.l a3,a0
.loop_end8
dbf d7,.loop8
.end8
movem.l (sp)+,d3-d4/d6-d7/a2/a3
rts
;-----------------------------------------------------------------------
; This faster version by Aki M Laukkanen <amlaukka@cc.helsinki.fi>
cnop 0,4
_R_DrawSpan_060
@R_DrawSpan_060
movem.l d2-d7/a2/a3,-(sp)
move.l (_ds_y),d0
move.l (_ds_x1),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1]
lea (_ylookup),a0
move.l (a0,d0.l*4),a0
lea (_columnofs),a1
add.l (a1,d1.l*4),a0
move.l (_ds_source),a1
move.l (_ds_colormap),a2
move.l (_ds_x2),d7 ; count = _ds_x2 - _ds_x1
sub.l d1,d7
addq.l #1,d7
move.l (_ds_xfrac),d0
move.l (_ds_yfrac),d1
move.l (_ds_xstep),d2
move.l (_ds_ystep),d3
move.l a0,d4
btst #0,d4
beq.b .skipb9
move.l d0,d5 ; do the unaligned pixels
move.l d1,d6 ; so we can write to longword
swap d5 ; boundary in the main loop
swap d6
and.w #$3f,d5
and.w #$3f,d6
lsl.w #6,d6
or.w d5,d6
move.b (a1,d6.w),d5
add.l d2,d0
move.b (a2,d5.w),(a0)+
add.l d3,d1
move.l a0,d4
subq.l #1,d7
.skipb9 btst #1,d4
beq.b .skips9
moveq #2,d4
cmp.l d4,d7
bls.b .skips9
move.l d0,d5 ; write two pixels
move.l d1,d6
swap d5
swap d6
and.w #$3f,d5
and.w #$3f,d6
lsl.w #6,d6
or.w d5,d6
move.b (a1,d6.w),d5
move.w (a2,d5.w),d4
add.l d2,d0
add.l d3,d1
move.l d0,d5
move.l d1,d6
swap d5
swap d6
and.w #$3f,d5
and.w #$3f,d6
lsl.w #6,d6
or.w d5,d6
move.b (a1,d6.w),d5
move.b (a2,d5.w),d4
add.l d2,d0
move.w d4,(a0)+
add.l d3,d1
subq.l #2,d7
.skips9 move.l d7,d6 ; setup registers
and.w #3,d6
move.l d6,a3
eor.w d0,d1 ; swap fraction parts for addx
eor.w d2,d3
eor.w d1,d0
eor.w d3,d2
eor.w d0,d1
eor.w d2,d3
swap d0
swap d1
swap d2
swap d3
lsl.w #6,d1
lsl.w #6,d3
moveq #0,d6
moveq #0,d5
sub.l #$f000,a1
lsr.l #2,d7
beq.w .skip_loop29
subq.l #1,d7
sub.w d3,d1
add.l d3,d1 ; setup the X flag
or.w #$ffc0,d0
or.w #$f03f,d1
move.w d0,d6
and.w d1,d6
bra.b .start_loop29
cnop 0,8
.loop29 or.w #$ffc0,d0 ; pOEP
or.w #$f03f,d1 ; sOEP
move.b (a2,d5.l),d4 ; pOEP but allows sOEP
move.w d0,d6 ; sOEP
and.w d1,d6 ; pOEP
move.l d4,(a0)+ ; sOEP
.start_loop29
addx.l d2,d0 ; pOEP only
addx.l d3,d1 ; pOEP only
move.b (a1,d6.l),d5 ; pOEP but allows sOEP
or.w #$ffc0,d0 ; sOEP
or.w #$f03f,d1 ; pOEP
move.w d0,d6 ; sOEP
move.w (a2,d5.l),d4 ; pOEP but allows sOEP
and.w d1,d6 ; sOEP
addx.l d2,d0 ; pOEP only
addx.l d3,d1 ; pOEP only
move.b (a1,d6.l),d5 ; pOEP but allows sOEP
or.w #$ffc0,d0 ; sOEP
or.w #$f03f,d1 ; pOEP
move.w d0,d6 ; sOEP
move.b (a2,d5.l),d4 ; pOEP but allows sOEP
and.w d1,d6 ; sOEP
addx.l d2,d0 ; pOEP only
addx.l d3,d1 ; pOEP only
move.b (a1,d6.l),d5 ; pOEP but allows sOEP
or.w #$ffc0,d0 ; sOEP
or.w #$f03f,d1 ; pOEP
move.w d0,d6 ; sOEP
swap d4 ; pOEP only
move.w (a2,d5.l),d4 ; pOEP but allows sOEP
and.w d1,d6 ; sOEP
addx.l d2,d0 ; pOEP only
addx.l d3,d1 ; pOEP only
move.b (a1,d6.l),d5 ; pOEP but allows sOEP
dbf d7,.loop29 ; pOEP only = 7.75 cycles/pixel
move.b (a2,d5.l),d4
move.l d4,(a0)+
.skip_loop29
sub.w d3,d1
add.l d3,d1
move.l a3,d7
bra.b .loop_end29
.loop39 or.w #$ffc0,d0
or.w #$f03f,d1
move.w d0,d6
and.w d1,d6
addx.l d2,d0
addx.l d3,d1
move.b (a1,d6.l),d5
move.b (a2,d5.l),(a0)+
.loop_end29
dbf d7,.loop39
.end29 movem.l (sp)+,d2-d7/a2/a3
rts
cnop 0,4
;-----------------------------------------------------------------------
; 030/040 version
_R_DrawSpan_040
@R_DrawSpan_040
movem.l d2-d7/a2-a4,-(sp)
move.l _ds_y(a4),d0
move.l _ds_x1(a4),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1]
lea _ylookup(a4),a0
move.l (a0,d0.l*4),a0
lea _columnofs(a4),a1
add.l (a1,d1.l*4),a0
move.l _ds_source(a4),a1
move.l _ds_colormap(a4),a2
move.l _ds_x2(a4),d7 ; count = _ds_x2 - _ds_x1
sub.l d1,d7
addq.l #1,d7
move.l _ds_xfrac(a4),d0
move.l _ds_yfrac(a4),d1
move.l _ds_xstep(a4),d2
move.l _ds_ystep(a4),d3
move.l a0,d4
btst #0,d4
beq.b .skipb0
move.l d0,d5 ; do the unaligned pixels
move.l d1,d6 ; so we can write to longword
swap d5 ; boundary in the main loop
swap d6
and.w #$3f,d5
and.w #$3f,d6
lsl.w #6,d6
or.w d5,d6
move.b (a1,d6.w),d5
add.l d2,d0
move.b (a2,d5.w),(a0)+
add.l d3,d1
move.l a0,d4
subq.l #1,d7
.skipb0 btst #1,d4
beq.b .skips0
moveq #2,d4
cmp.l d4,d7
bls.b .skips0
move.l d0,d5 ; write two pixels
move.l d1,d6
swap d5
swap d6
and.w #$3f,d5
and.w #$3f,d6
lsl.w #6,d6
or.w d5,d6
move.b (a1,d6.w),d5
move.w (a2,d5.w),d4
add.l d2,d0
add.l d3,d1
move.l d0,d5
move.l d1,d6
swap d5
swap d6
and.w #$3f,d5
and.w #$3f,d6
lsl.w #6,d6
or.w d5,d6
move.b (a1,d6.w),d5
move.b (a2,d5.w),d4
add.l d2,d0
move.w d4,(a0)+
add.l d3,d1
subq.l #2,d7
.skips0 move.l a2,d4
add.l #$1000,a1 ; catch 22
move.l a0,a3
add.l d7,a3
move.l d7,d5
and.b #~3,d5
move.l a0,a4
add.l d5,a4
eor.w d0,d1 ; swap fraction parts for addx
eor.w d2,d3
eor.w d1,d0
eor.w d3,d2
eor.w d0,d1
eor.w d2,d3
swap d0
swap d1
swap d2
swap d3
lsl.w #6,d1
lsl.w #6,d3
move.w #$ffc0,d6
move.w #$f03f,d7
lsr.w #2,d5
beq.b .skip_loop20
sub.w d2,d0
add.l d2,d0 ; setup the X flag
.loop20 or.w d6,d0
or.w d7,d1
and.w d1,d0
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.w (a2),d5
or.w d6,d0
or.w d7,d1
and.w d1,d0
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.b (a2),d5
swap d5
or.w d6,d0
or.w d7,d1
and.w d1,d0
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.w (a2),d5
or.w d6,d0
or.w d7,d1
and.w d1,d0
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.b (a2),d5
move.l d5,(a0)+
cmp.l a0,a4
bne.b .loop20
.skip_loop20
sub.w d2,d0
add.l d2,d0
bra.b .loop_end20
.loop30 or.w d6,d0
or.w d7,d1
and.w d1,d0
addx.l d3,d1
move.b (a1,d0.w),d4
addx.l d2,d0
move.l d4,a2
move.b (a2),(a0)+
.loop_end20
cmp.l a0,a3
bne.b .loop30
.end20 movem.l (sp)+,d2-d7/a2-a4
rts
;-----------------------------------------------------------------------
xref _segtextured
xref _markfloor
xref _markceiling
xref _maskedtexture
xref _maskedtexturecol
xref _toptexture
xref _bottomtexture
xref _midtexture
xref _rw_x
xref _rw_stopx
xref _rw_centerangle
xref _rw_offset
xref _rw_distance
xref _rw_scale
xref _rw_scalestep
xref _rw_midtexturemid
xref _rw_toptexturemid
xref _rw_bottomtexturemid
xref _pixhigh
xref _pixlow
xref _pixhighstep
xref _pixlowstep
xref _topfrac
xref _topstep
xref _bottomfrac
xref _bottomstep
xref _walllights
xref _ceilingclip
xref _ceilingplane
xref _floorclip
xref _floorplane
xref _xtoviewangle
xref _finetangent
xref _FixedMul
xref _colfunc
xref @R_GetColumn
cnop 0,4
_R_RenderSegLoop
@R_RenderSegLoop
movem.l d2-d7/a2-a6,-(sp)
movea.l _rw_x(a4),a2 ; a2 = rw_x
movea.l _topfrac(a4),a3 ; a3 = topfrac
movea.l _bottomfrac(a4),a5 ; a5 = bottomfrac
movea.l _rw_scale(a4),a6 ; a6 = rw_scale
bra.w 1$ ; for ( ; rw_x < rw_stopx ; rw_x++)
20$ move.l a2,d0 ; d0 = rw_x
move.l a3,d7 ; d7 = topfrac
lea _ceilingclip(a4),a0 ; a0 -> ceilingclip
subq.l #1,d7 ; d7 = topfrac - 1
move.w (a0,d0.l*2),d3 ; d3.w = ceilingclip[rw_x]
asr.l #8,d7 ; d7 = (topfrac - 1) >> 8
ext.l d3 ; d3 = ceilingclip[rw_x]
asr.l #4,d7 ; d7 = (topfrac - 1) >> 12
addq.l #1,d3 ; d3 = top = ceilingclip[rw_x] + 1
addq.l #1,d7 ; d7 = yl = (topfrac + (1 << 12) - 1) >> 12
cmp.l d3,d7
bge 2$
move.l d3,d7 ; d7 = yl = ceilingclip[rw_x] + 1
2$ tst.l _markceiling(a4) ; if (markceiling) {
beq.b 3$
lea _floorclip(a4),a1 ; a1 -> floorclip
move.l d7,d4 ; d4 = yl
move.w (a1,d0.l*2),d1 ; d1.w = floorclip[rw_x]
subq.l #1,d4 ; d4 = bottom = yl - 1
ext.l d1 ; d1 = floorclip[rw_x]
cmp.l d1,d4 ; if (bottom >= floorclip[rw_x])
blt.b 4$
move.l d1,d4
subq.l #1,d4 ; d4 = bottom = floorclip[rw_x] - 1
4$ cmp.l d4,d3 ; if (top <= bottom)
bgt.b 3$
movea.l _ceilingplane(a4),a1
adda.l d0,a1 ; a1 -> ceilingplane->0[rw_x]
move.b d3,$15(a1) ; ceilingplane->top[rw_x] = top
move.b d4,$157(a1) ; ceilingplane->bottom[rw_x] = bottom
3$ move.l a5,d3 ; d3 = bottomfrac
lea _floorclip(a4),a1 ; a1 -> floorclip
asr.l #8,d3 ; d3 = bottomfrac >> 8
move.w (a1,d0.l*2),d1 ; d1.w = floorclip[rw_x]
asr.l #4,d3 ; d3 = yh = bottomfrac >> 12
ext.l d1 ; d1 = floorclip[rw_x]
cmp.l d1,d3 ; if (yh >= floorclip[rw_x])
blt.b 5$
move.l d1,d3
subq.l #1,d3 ; d3 = yh = floorclip[rw_x] - 1
5$ tst.l _markfloor(a4) ; if (markfloor)
beq.b 6$
move.l d3,d4 ; d4 = yh
move.w (a0,d0.l*2),d2 ; d2.w = ceilingclip[rw_x]
addq.l #1,d4 ; d4 = top = yh + 1
ext.l d2 ; d2 = ceilingclip[rw_x]
subq.l #1,d1 ; d1 = bottom = floorclip[rw_x] - 1
cmp.l d2,d4 ; if (top <= ceilingclip[rw_x])
bgt.b 7$
move.l d2,d4
addq.l #1,d4 ; d4 = top = ceilingclip[rw_x] + 1
7$ cmp.l d1,d4 ; if (top <= bottom)
bgt.b 6$
movea.l _floorplane(a4),a1
adda.l d0,a1 ; a1 -> floorplane->0[rw_x]
move.b d4,$15(a1) ; floorplane->top[rw_x] = top
move.b d1,$157(a1) ; floorplane->bottom[rw_x] = bottom
6$ tst.l _segtextured(a4) ; if (segtextured)
beq.b 8$
lea _xtoviewangle(a4),a0 ; a0 -> xtoviewangle
move.l _rw_centerangle(a4),d1
add.l (a0,d0.l*4),d1 ; d1 = rw_centerangle + xtoviewangle[rw_x]
swap d1 ; d1 = angle
lea (_finetangent),a0 ; a0 -> finetangent
lsr.w #3,d1
move.l (a0,d1.w*4),d0 ; d0 = finetangent[angle]
movea.l _FixedMul(a4),a0
move.l _rw_distance(a4),d1
jsr (a0) ; d0 = FixedMul(finetangent[angle],rw_distance)
move.l _rw_offset(a4),d5
move.l a6,d4 ; d4 = rw_scale
sub.l d0,d5 ; d5 = rw_offset-FixedMul(finetangent[angle],rw_distance)
asr.l #8,d4
swap d5 ; d5.w = texturecolumn >>= 16
asr.l #4,d4 ; d4 = index = rw_scale >> 12
ext.l d5 ; d5 = texturecolumn
moveq #$30,d2 ; d2 = MAXLIGHTSCALE = $30
cmp.l d2,d4 ; if (index >= MAXLIGHTSCALE)
bcs.b 9$
moveq #$2f,d4 ; d4 = index = MAXLIGHTSCALE - 1
9$ movea.l _walllights(a4),a0
moveq #-1,d0 ; d0 = $ffffffff
move.l (a0,d4.l*4),_dc_colormap(a4) ; dc_colormap = walllights[index]
move.l a2,_dc_x(a4) ; dc_x = rw_x
move.l a6,d1 ; d1 = rw_scale
divu.l d1,d0
move.l d0,_dc_iscale(a4) ; dc_iscale = $ffffffff / rw_scale
8$ move.l _midtexture(a4),d0 ; if (midtexture)
beq.b 10$
move.l d7,_dc_yl(a4) ; dc_yl = yl
move.l d3,_dc_yh(a4) ; dc_yh = yh
move.l _rw_midtexturemid(a4),_dc_texturemid(a4)
move.l d5,d1 ; d1 = texturecolumn
jsr (@R_GetColumn)
move.l d0,_dc_source(a4) ; dc_source = R_GetColumn(midtexture,texturecolumn)
movea.l _colfunc(a4),a0
jsr (a0) ; colfunc()
move.l a2,d0 ; d0 = rw_x
move.l _viewheight(a4),d1 ; d1 = viewheight
lea _ceilingclip(a4),a0
move.w d1,(a0,d0.l*2) ; ceilingclip[rw_x] = viewheight
lea _floorclip(a4),a0
move.w #$ffff,(a0,d0.l*2) ; floorclip[rw_x] = -1
bra.w 11$
10$ move.l _toptexture(a4),d0 ; if (toptexture)
beq.b 12$
move.l _pixhighstep(a4),d1 ; d1 = pixhighstep
move.l _pixhigh(a4),d2 ; d2 = pixhigh
add.l d1,_pixhigh(a4) ; pixhigh += pixhighstep
lea _floorclip(a4),a0 ; a0 -> floorclip
asr.l #8,d2 ; d2 = pixhigh >> 8
move.l a2,d1 ; d1 = rw_x
move.w (a0,d1.l*2),d1 ; d1.w = floorclip[rw_x]
asr.l #4,d2 ; d2 = pixhigh >> 12
ext.l d1 ; d1 = floorclip[rw_x]
move.l d2,d6 ; d6 = mid = pixhigh >> 12
cmp.l d1,d6 ; if (mid >= floorclip[rw_x])
blt.b 13$
move.l d1,d6
subq.l #1,d6 ; d6 = mid = floorclip[rw_x] - 1
13$ cmp.l d7,d6 ; if (mid >= yl)
blt.b 14$
move.l d7,_dc_yl(a4) ; dc_yl = yl
move.l d6,_dc_yh(a4) ; dc_yh = mid
move.l _rw_toptexturemid(a4),_dc_texturemid(a4)
move.l d5,d1 ; d1 = texturecolumn, d0 = toptexture
jsr (@R_GetColumn)
move.l d0,_dc_source(a4) ; dc_source = R_GetColumn(d0,d1)
movea.l _colfunc(a4),a0
jsr (a0) ; colfunc()
move.l a2,d0 ; d0 = rw_x
lea _ceilingclip(a4),a0
move.w d6,(a0,d0.l*2) ; ceilingclip[rw_x] = mid
bra.b 15$
12$ tst.l _markceiling(a4) ; else if (markceiling)
beq.b 15$
14$ subq.l #1,d7 ; d7 = yl - 1
move.l a2,d0 ; d0 = rw_x
lea _ceilingclip(a4),a0
move.w d7,(a0,d0.l*2) ; ceilingclip[rw_x] = yl - 1
15$ move.l _bottomtexture(a4),d0 ; if (bottomtexture)
beq.b 16$
move.l _pixlow(a4),d6 ; d6 = pixlow
move.l d6,d1 ; d1 = pixlow
lea _ceilingclip(a4),a0 ; a0 -> ceilingclip
add.l _pixlowstep(a4),d1 ; d1 = pixlow + pixlowstep
subq.l #1,d6 ; d6 = pixlow - 1
move.l d1,_pixlow(a4) ; pixlow += pixlowstep
asr.l #8,d6 ; d6 = (pixlow - 1) >> 8
move.l a2,d1 ; d1 = rw_x
asr.l #4,d6 ; d6 = (pixlow - 1) >> 12
move.w (a0,d1.l*2),d1 ; d1.w = ceilingclip[rw_x]
addq.l #1,d6 ; d6 = mid = (pixlow + (1 << 12) - 1) >> 12
ext.l d1 ; d1 = ceilingclip[rw_x]
cmp.l d1,d6 ; if (mid <= ceilingclip[rw_x])
bgt.b 17$
move.l d1,d6
addq.l #1,d6 ; d6 = mid = ceilingclip[rw_x] + 1
17$ cmp.l d3,d6 ; if (mid <= yh)
bgt.b 18$
move.l d6,_dc_yl(a4) ; dc_yl = mid
move.l d3,_dc_yh(a4) ; dc_yh = yh
move.l _rw_bottomtexturemid(a4),_dc_texturemid(a4)
move.l d5,d1 ; d1 = texturecolumn, d0 = bottomtexture
jsr (@R_GetColumn)
move.l d0,_dc_source(a4) ; dc_source = R_GetColumn(d0,d1)
movea.l _colfunc(a4),a0
jsr (a0) ; colfunc ()
move.l a2,d0 ; d0 = rw_x
lea _floorclip(a4),a0
move.w d6,(a0,d0.l*2) ; floorclip[rw_x] = mid
bra.b 19$
16$ tst.l _markfloor(a4) ; else if (markfloor)
beq.b 19$
18$ addq.l #1,d3 ; d3 = yh + 1
move.l a2,d0 ; d0 = rw_x
lea _floorclip(a4),a0
move.w d3,(a0,d0.l*2) ; floorclip[rw_x] = yh + 1
19$ tst.l _maskedtexture(a4) ; if (maskedtexture)
beq.b 11$
move.l a2,d0 ; d0 = rw_x
movea.l _maskedtexturecol(a4),a0
move.w d5,(a0,d0.l*2) ; maskedtexturecol[rw_x] = texturecolumn
11$ adda.l _rw_scalestep(a4),a6 ; rw_scale += rw_scalestep
adda.l _topstep(a4),a3 ; topfrac += topstep
adda.l _bottomstep(a4),a5 ; bottomfrac += bottomstep
addq.l #1,a2 ; rw_x++
1$ cmpa.l _rw_stopx(a4),a2
blt.w 20$
move.l a2,_rw_x(a4)
move.l a3,_topfrac(a4)
move.l a5,_bottomfrac(a4)
move.l a6,_rw_scale(a4)
movem.l (sp)+,d2-d7/a2-a6
rts
;***********************************************************************
end